Preparations

Load libraries and functions

library("cluster")
library("dendextend")
## 
## ---------------------
## Welcome to dendextend version 1.14.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
## 
##     cutree
source("functions.R")
## Loading required package: ggplot2

Corpus description and selection

Load data

# Get data with Stylo
# data = stylo::load.corpus.and.parse(corpus.dir = "dh-meier-data/output/transkribus/tokenized/boudams/", features = "w", ngram.size = 1, preserve.case = FALSE)
# Get freq lists
#data = stylo::make.table.of.frequencies(corpus = data, features = unique(sort(unlist(data))), relative = FALSE)
# Write it
#write.csv(as.matrix(data), "data/transkr_expanded_words.csv")
data = read.csv("data/transkr_expanded_words.csv", header = TRUE, row.names = 1)
data = t(data)

Text lengths

nwords = colSums(data)
summary(nwords)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     298    2244    3539    5070    6774   18971
boxplot(nwords)
boxplot(nwords)$out

## 05_Ano_Leg-A_Ap_NA_Vie_Jacques  29_Wau_Leg-C_Co_Ev_Vie_Martin 
##                          17920                          14432 
## 31_Wau_Leg-C_Co_Ev_Dia_Martin3 34_Wau_Leg-C_Co_Ev_Vie_Martial 
##                          18971                          15255
head(sort(nwords), n = 15)
##          03_Ano_Leg-A_Ap_NA_Mar_Jean          62_Ano_Leg-N_NA_NA_NA_Index 
##                                  298                                  301 
##       61_Ano_Leg-B_NA_NA_NA_Jugement       30_Wau_Leg-C_Co_Ev_Tra_Martin2 
##                                  406                                  722 
##      08_Ano_Leg-A_Ap_NA_Vie_Philippe     59_Ano_Leg-C_Vi_NA_Vie_Euphrasie 
##                                 1014                                 1293 
## 09_Ano_Leg-A_Ap_NA_Vie_JacquesMineur         32_Wau_Leg-C_Co_Ev_Vie_Brice 
##                                 1356                                 1385 
##    60_Ano_Leg-B_NA_NA_NA_Antechriste       54_Ano_Leg-C_Vi_NA_Vie_Pelagie 
##                                 1485                                 1506 
##      20_Ano_Leg-B_Ma_Fe_Vie_Felicite          11_Ano_Leg-A_Ap_NA_Vie_Marc 
##                                 1676                                 1820 
##         23_Ano_Leg-B_Ma_Ho_Vie_Sixte    53_Ano_Leg-C_Vi_NA_Vie_Marguerite 
##                                 1894                                 1935 
##       35_Wau_Leg-C_Co_Ev_Vie_Nicolas 
##                                 1960
toKeep = colnames(data)[nwords > 1000]

toKeep = toKeep[grep("Bestiaire", toKeep, invert = TRUE)]

df = as.data.frame(nwords)

ggplot(df, aes(x="", y=nwords)) + geom_violin() + geom_boxplot(width=0.3) +  theme(axis.text.y = element_text(size = rel(1.4)), axis.title = element_text(size = rel(1.4))) + xlab("Est. length in words of corpus texts") + scale_y_continuous(breaks=c(0, 2500, 5000, 7500, 10000, 12500, 15000, 17500))

Transkribus raw data

3-grams from raw data

Load data

# Get data with Stylo
#data = stylo::load.corpus.and.parse(corpus.dir = "dh-meier-data/output/transkribus/raw/", features = "c", ngram.size = 3, preserve.case = FALSE)
# Get freq lists
#data = stylo::make.table.of.frequencies(corpus = data, features = unique(sort(unlist(data))), relative = FALSE)
# Write it
#write.csv(as.matrix(data), "data/transkr_raw_char3grams.csv")
data = read.csv("data/transkr_raw_char3grams.csv", header = TRUE, row.names = 1)
data = t(data)
data = data[, toKeep]
data = data[rowSums(data) > 0, ]

Burrows + vector-length norm

d = data
# Selection based on Moisl 2011
select = selection(d, z = 1.645)
select = select[,4]
# Normalisations
d = relativeFreqs(d)
# save data for robustness checks
Raw3grSave = d
d = d[select,]
d = normalisations(d)
myCAH = cluster::agnes(t(d), metric = "manhattan", method="ward")
# Save
CAHRaw3gr = myCAH
#TODO: heights
# barplot(sort(myCAH$height))
plotRaw3grams = cahPlotCol(myCAH, k = 9, main = "Characters 3-grams from raw data (Transkr)")

# somCAH = somCluster(d)
# somCAHRaw3gr = somCAH
# somplotRaw3grams = cahPlotCol(somCAH, k = 9, main = "SOM BASED - Characters 3-grams from raw data (Transkr)")

Class descriptions, Wauchier and StLambert

Classes members

classes = cutree(myCAH, k = 9)
classes
##         00_Ano_Leg-A_Ap_Ev_Dis_Pierre1         01_Ano_Leg-A_Ap_NA_Vie_Pierre2 
##                                      1                                      1 
##            02_Ano_Leg-A_Ap_NA_Pas_Paul         04_Ano_Leg-A_Ap_NA_Vie_Jean_Ev 
##                                      1                                      1 
##         05_Ano_Leg-A_Ap_NA_Vie_Jacques        06_Ano_Leg-A_Ap_NA_Vie_Matthieu 
##                                      1                                      2 
##       07_Ano_Leg-A_Ap_NA_Vie_SimonJude        08_Ano_Leg-A_Ap_NA_Vie_Philippe 
##                                      2                                      2 
##   09_Ano_Leg-A_Ap_NA_Vie_JacquesMineur      10_Ano_Leg-A_Ap_NA_Vie_Barthelemy 
##                                      2                                      2 
##            11_Ano_Leg-A_Ap_NA_Vie_Marc          12_Ano_Leg-A_Ma_Ho_Vie_Longin 
##                                      2                                      3 
##       13_Ano_Leg-B_Ma_Ho_Vie_Sebastien         14_Ano_Leg-B_Ma_Ho_Vie_Vincent 
##                                      3                                      3 
##         15_Ano_Leg-B_Ma_Ho_Vie_Georges      16_Ano_Leg-B_Ma_Ho_Vie_Christophe 
##                                      3                                      3 
##          17_Ano_Leg-B_Ma_Fe_Vie_Agathe            18_Ano_Leg-B_Ma_Fe_Vie_Luce 
##                                      3                                      3 
##           19_Ano_Leg-B_Ma_Fe_Vie_Agnes        20_Ano_Leg-B_Ma_Fe_Vie_Felicite 
##                                      3                                      3 
##       21_Ano_Leg-B_Ma_Fe_Vie_Christine          22_Ano_Leg-B_Ma_Fe_Vie_Cecile 
##                                      3                                      3 
##           23_Ano_Leg-B_Ma_Ho_Vie_Sixte         24_Ano_Leg-B_Ma_Ho_Vie_Laurent 
##                                      4                                      4 
##       25_Ano_Leg-B_Ma_Ho_Vie_Hippolyte         26_Ano_Leg-B_Ma_Ev_Vie_Lambert 
##                                      4                                      5 
##       27_Ano_Leg-B_Ma_Ho_Vie_Pantaleon         28_Ano_Leg-B_Ma_Ho_Vie_Clement 
##                                      3                                      6 
##          29_Wau_Leg-C_Co_Ev_Vie_Martin         31_Wau_Leg-C_Co_Ev_Dia_Martin3 
##                                      5                                      5 
##           32_Wau_Leg-C_Co_Ev_Vie_Brice          33_Wau_Leg-C_Co_Er_Vie_Gilles 
##                                      5                                      5 
##         34_Wau_Leg-C_Co_Ev_Vie_Martial         35_Wau_Leg-C_Co_Ev_Vie_Nicolas 
##                                      5                                      5 
##        36_Wau_Leg-C_Co_Ev_Mir_Nicolas2        37_Wau_Leg-C_Co_Ev_Tra_Nicolas3 
##                                      5                                      5 
##          38_Wau_Leg-C_Co_Ev_Vie_Jerome          39_Wau_Leg-C_Co_Ev_Vie_Benoit 
##                                      5                                      5 
##          40_Wau_Leg-C_Co_Er_Vie_Alexis           41_Ano_Leg-C_Vi_NA_Vie_Irene 
##                                      5                                      6 
##       42_Ano_Leg-B_Vi_NA_Ass_NotreDame       43_Ano_Leg-C_Vi_NA_Vie_Catherine 
##                                      7                                      7 
##           44_Ano_Leg-C_Ap_NA_Vie_Andre          45_Ano_Leg-C_Ap_NA_Pas_Andre2 
##                                      7                                      7 
##         46_Ano_Leg-B_Co_NA_Pur_Patrice      47_Ano_Leg-C_Co_er_Vie_PaulErmite 
##                                      8                                      8 
##         48_Ano_Leg-C_Co_ev_Tra_Benoit2            49_Ano_Leg-C_NA_NA_Vie_Maur 
##                                      8                                      8 
##         50_Ano_Leg-C_NA_NA_Vie_Placide        51_Ano_Leg-C_Ma_ho_Vie_Eustache 
##                                      8                                      8 
##           52_Ano_Leg-C_Co_NA_Vie_Fursi      53_Ano_Leg-C_Vi_NA_Vie_Marguerite 
##                                      8                                      9 
##         54_Ano_Leg-C_Vi_NA_Vie_Pelagie          55_Ano_Leg-C_Co_NA_Vie_Simeon 
##                                      9                                      8 
##        56_Ano_Leg-C_Co_NA_Vie_Mamertin          57_Ano_Leg-C_Vi_NA_Vie_Julien 
##                                      9                                      6 
## 58_Ano_Leg-C_Vi_NA_Vie_MarieEgyptienne       59_Ano_Leg-C_Vi_NA_Vie_Euphrasie 
##                                      9                                      9 
##      60_Ano_Leg-B_NA_NA_NA_Antechriste 
##                                      7
classlabels = c("1 (A1)", "2 (A2)", "3 (B)", "4 (B2)", "5 (WAU)", "6 (C?)", "9 (C??)", "8 (C2)", "7 (C3)")

Classes description

Most correlated features to the classification in general
maDesc = classesDesc(myCAH, d, k = 9)
head(maDesc$quanti.var, n = 20)
##             Eta2      P-value
## m.m.e  0.9363350 3.392896e-27
## o.m.m  0.8421549 1.806011e-17
## e.i.g  0.8159344 7.690754e-16
## q.u.e  0.8088302 1.933238e-15
## l.s..  0.7886573 2.208904e-14
## i.h.e  0.7885668 2.231930e-14
## c.i.u  0.7856957 3.094611e-14
## X..i.h 0.7741352 1.103312e-13
## f.a.i  0.7616974 4.024098e-13
## X..a.. 0.7584433 5.579947e-13
## d.e..  0.7545781 8.178504e-13
## X..q.u 0.7502560 1.244849e-12
## q.i.l  0.7464034 1.798744e-12
## X..u.n 0.7397926 3.337237e-12
## X..f.a 0.7375939 4.083879e-12
## a.i.n  0.7371363 4.258191e-12
## i.e.s  0.7367998 4.390874e-12
## e.l.s  0.7255644 1.195223e-11
## m.m.a  0.7223610 1.577593e-11
## s.a.r  0.7211243 1.754444e-11
A = myDescPlot(relativeFreqs(data)["m.m.e", , drop = FALSE], classes, type = "violinplot", main = "mme", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
B = myDescPlot(relativeFreqs(data)["o.m.m", , drop = FALSE], classes, type = "violinplot", main = "omm", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
C = myDescPlot(relativeFreqs(data)["e.i.g", , drop = FALSE], classes, type = "violinplot", main = "eig", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
D = myDescPlot(relativeFreqs(data)["q.u.e", , drop = FALSE], classes, type = "violinplot", main = "que", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
E = myDescPlot(relativeFreqs(data)["l.s..", , drop = FALSE], classes, type = "violinplot", main = "ls_", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
F = myDescPlot(relativeFreqs(data)["i.h.e", , drop = FALSE], classes, type = "violinplot", main = "ihe", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
gridExtra::grid.arrange(A,B,C,D,E,F, ncol = 2)

# Save one for later
WauFeats1 = myDescPlot(relativeFreqs(data)["q.i.l", , drop = FALSE], classes, type = "violinplot", main = "qil", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
Features and clusters (v-test, distribution,…): Wauchier
nfeats = 10
values = c(head(sort(maDesc$quanti$`5`[,1], decreasing = TRUE), n = nfeats), head(sort(maDesc$quanti$`5`[,1]), n = nfeats))
classBarplot(values, title="V-test for Wauchier class", ylab = "v-test")

Example of two main feats of Wauchier class

class = as.factor(classes)
levels(class) = classlabels
levels(class) = c(levels(class), "LAMB")
class["26_Ano_Leg-B_Ma_Ev_Vie_Lambert"] = "LAMB"
rf = cbind(as.data.frame(t(relativeFreqs(data))), class)
qplot(q.i.l, o.m.., colour=class, data = rf)

qplot(e.i.n, q.i.l, colour=class, data = rf)

Specificities
specifPlot(data, myCAH, k = 9, classlabels = classlabels)

Transkribus expanded data

Load data

data = read.csv("data/transkr_expanded_words.csv", header = TRUE, row.names = 1)
data = t(data)
data = data[, toKeep]
data = data[rowSums(data) > 0, ]
dataWords = data

Forms from expanded data

Burrows + vector-length norm

d = data
# Selection based on Moisl 2011
select = selection(d, z = 1.645)
select = select[,4]
# Normalisations
d = relativeFreqs(d)
# save data for robustness checks
d = d[select,]
WordsSave = d
d = normalisations(d)
myCAH = cluster::agnes(t(d), metric = "manhattan", method="ward")
# Save
CAHForms = myCAH
#TODO: heights
# barplot(sort(myCAH$height))
plotForms = cahPlotCol(myCAH, k = 9, main = "Expanded word forms (Transkr/Boudams/Pie)")

#somCAH = somCluster(d)
#somCAHForms = somCAH
#somplotForms = cahPlotCol(somCAH, k = 9, main = "SOM BASED - Expanded word forms (Transkr/Boudams/Pie)")

Affixes from expanded data

# Creating affixes database from all words
dataAffs = countAffixes(data)

Burrows + vector-length norm

d = dataAffs
# Selection based on Moisl 2011
select = selection(d, z = 1.645)
select = select[,4]
# Normalisations
d = relativeFreqs(d)
d = d[select,]
AffixesSave = d
d = normalisations(d)
myCAH = cluster::agnes(t(d), metric = "manhattan", method="ward")
# Save
CAHAffs = myCAH
#TODO: heights
# barplot(sort(myCAH$height))
plotAffixes = cahPlotCol(myCAH, k = 9, main = "Expanded affixes (Transkr/Boudams/Pie)")
#somCAH = somCluster(d)
#somCAHAffs = somCAH
#somplotAffixes = cahPlotCol(somCAH, k = 9, main = "SOM BASED - Expanded affixes (Transkr/Boudams/Pie)")

Unstandardised function words from expanded data

Create function words list

#labels(sort(rowSums(data), decreasing = TRUE)[1:300])
# Avec ou sans pronoms ?
functionWords = source("functionWords.R")$value
dataFW = data

Burrows + vector-length norm

d = relativeFreqs(data)
d = d[functionWords,]
# save data for robustness checks
FWSave = d
d = normalisations(d)
myCAH = cluster::agnes(t(d), metric = "manhattan", method="ward")
# Save
CAHFW = myCAH
# barplot(sort(myCAH$height))
plotFW = cahPlotCol(myCAH, k = 8, main = "Function words with pronouns and auxiliaries\n(Transkr/Boudams/Pie)")
#plotCol(myCAH, main = "toto")
#somCAH = somCluster(d)
#somCAHFW = somCAH
#somplotFW = cahPlotCol(somCAH, k = 9, main = "SOM BASED - Function words")

Transkribus with linguistic annotation

POS 3-grams

data = read.csv("data/transkr_pos3-gr.csv", header = TRUE, row.names = 1, sep = ";")
#remove total freq
data = data[, -1]
colnames(data) = gsub("^X", "", colnames(data))
colnames(data) = gsub(".decolumnized", "", colnames(data))
colnames(data) = gsub("Leg.", "Leg-", colnames(data))
data = data[, toKeep]
data = data[rowSums(data) > 0, ]
data = as.matrix(data)

Burrows + vector-length norm

dataPOS3gr = data
d = data
# Selection based on Moisl 2011
select = selection(d, z = 1.645)
write.csv(select, file="data/select_pos3gr_moisl.csv")
select = select[,4]
# Normalisations
d = relativeFreqs(d)
# save data for robustness checks
d = d[select,]
POS3grSave = d
d = normalisations(d)
myCAH = cluster::agnes(t(d), metric = "manhattan", method="ward")
# Save
CAHPOS3gr = myCAH
#TODO: heights
# barplot(sort(myCAH$height))
plotPOS3grams = cahPlotCol(myCAH, k = 9, main = "POS 3-grams (Transkr/Boudams/Pie/Pie)")
#somCAH = somCluster(d)
#somCAHPOS3gr = somCAH
#somplotPOS3grams = cahPlotCol(somCAH, k = 9, main = "SOM BASED - POS 3-grams")

Lemmas

data = read.csv("data/transkr_lemmas.csv", header = TRUE, row.names = 1, sep = ";")
#remove total freq
data = data[, -1]
colnames(data) = gsub("^X", "", colnames(data))
colnames(data) = gsub(".decolumnized", "", colnames(data))
colnames(data) = gsub("Leg.", "Leg-", colnames(data))
data = data[, toKeep]
data = data[rowSums(data) > 0, ]
data = as.matrix(data)
dataLemmas = data

Burrows + vector-length norm

d = data
# Selection based on Moisl 2011
select = selection(d, z = 1.645)
write.csv(select, file="data/select_lemmas_moisl.csv")
select = select[,4]
# Normalisations
d = relativeFreqs(d)
d = d[select,]
LemmasSave = d
d = normalisations(d)
myCAH = cluster::agnes(t(d), metric = "manhattan", method="ward")
# Save
CAHLemmas = myCAH
#TODO: heights
# barplot(sort(myCAH$height))
plotLemmas = cahPlotCol(myCAH, k = 9, main = "Lemmas (Transkr/Boudams/Pie/Pie)")
#somCAH = somCluster(d)
#somCAHLemmas = somCAH
#somplotLemmas = cahPlotCol(somCAH, k = 9, main = "SOM BASED - Lemmas")

Function words from lemmas

# Find function words
#rownames(data)[1:250]
functionLemmas = source("functionLemmas.R")$value

Burrows + vector-length norm

d = relativeFreqs(data)
d = d[functionLemmas,]
FLSave = d
d = normalisations(d)
myCAH = cluster::agnes(t(d), metric = "manhattan", method="ward")
# Save
CAHFL = myCAH
# barplot(sort(myCAH$height))
plotFL = cahPlotCol(myCAH, k = 8, main = "Function Lemmas with pronouns and auxiliaries\n(Transkr/Boudams/Pie)")
#plotCol(myCAH, main = "toto")
#somCAH = somCluster(d)
#somCAHFL = somCAH
#somplotFL = cahPlotCol(somCAH, k = 9, main = "SOM BASED - Function words (lemmas)")

Affixes + POS 3-gr + Function words (lemmas)

data = rbind(AffixesSave, POS3grSave, FLSave)
d = normalisations(data)
myCAH = cluster::agnes(t(d), metric = "manhattan", method="ward")
# Save
CAHGlob = myCAH
#TODO: heights
# barplot(sort(myCAH$height))
plotGlob = cahPlotCol(myCAH, k = 9, main = "Affixes + POS 3- grams + Function words (lemmas)")
#somCAH = somCluster(d)
#somCAHGlob = somCAH
#somplotGlob = cahPlotCol(somCAH, k = 9, main = "SOM BASED - Affixes + POS 3- grams + Function words (lemmas)")

Affixes + POS 3-gr + Function words (unnorm)

data = rbind(AffixesSave, POS3grSave, FWSave)
d = normalisations(data)
myCAH = cluster::agnes(t(d), metric = "manhattan", method="ward")
# Save
CAHGlob2 = myCAH
#TODO: heights
# barplot(sort(myCAH$height))
plotGlob2 = cahPlotCol(myCAH, k = 9, main = "Affixes + POS 3- grams + Function words (unnorm.)")
#somCAH = somCluster(d)
#somCAHGlob2 = somCAH
#somplotGlob2 = cahPlotCol(somCAH, k = 9, main = "SOM BASED - Affixes + POS 3- grams + Function words (unnorm.)")

Class descriptions, Wauchier and StLambert

Classes members

classes = cutree(myCAH, k = 9)
classes
##         00_Ano_Leg-A_Ap_Ev_Dis_Pierre1         01_Ano_Leg-A_Ap_NA_Vie_Pierre2 
##                                      1                                      1 
##            02_Ano_Leg-A_Ap_NA_Pas_Paul         04_Ano_Leg-A_Ap_NA_Vie_Jean_Ev 
##                                      1                                      1 
##         05_Ano_Leg-A_Ap_NA_Vie_Jacques        06_Ano_Leg-A_Ap_NA_Vie_Matthieu 
##                                      2                                      2 
##       07_Ano_Leg-A_Ap_NA_Vie_SimonJude        08_Ano_Leg-A_Ap_NA_Vie_Philippe 
##                                      2                                      2 
##   09_Ano_Leg-A_Ap_NA_Vie_JacquesMineur      10_Ano_Leg-A_Ap_NA_Vie_Barthelemy 
##                                      2                                      2 
##            11_Ano_Leg-A_Ap_NA_Vie_Marc          12_Ano_Leg-A_Ma_Ho_Vie_Longin 
##                                      3                                      3 
##       13_Ano_Leg-B_Ma_Ho_Vie_Sebastien         14_Ano_Leg-B_Ma_Ho_Vie_Vincent 
##                                      4                                      4 
##         15_Ano_Leg-B_Ma_Ho_Vie_Georges      16_Ano_Leg-B_Ma_Ho_Vie_Christophe 
##                                      3                                      4 
##          17_Ano_Leg-B_Ma_Fe_Vie_Agathe            18_Ano_Leg-B_Ma_Fe_Vie_Luce 
##                                      5                                      5 
##           19_Ano_Leg-B_Ma_Fe_Vie_Agnes        20_Ano_Leg-B_Ma_Fe_Vie_Felicite 
##                                      5                                      3 
##       21_Ano_Leg-B_Ma_Fe_Vie_Christine          22_Ano_Leg-B_Ma_Fe_Vie_Cecile 
##                                      5                                      4 
##           23_Ano_Leg-B_Ma_Ho_Vie_Sixte         24_Ano_Leg-B_Ma_Ho_Vie_Laurent 
##                                      3                                      3 
##       25_Ano_Leg-B_Ma_Ho_Vie_Hippolyte         26_Ano_Leg-B_Ma_Ev_Vie_Lambert 
##                                      3                                      6 
##       27_Ano_Leg-B_Ma_Ho_Vie_Pantaleon         28_Ano_Leg-B_Ma_Ho_Vie_Clement 
##                                      3                                      7 
##          29_Wau_Leg-C_Co_Ev_Vie_Martin         31_Wau_Leg-C_Co_Ev_Dia_Martin3 
##                                      6                                      6 
##           32_Wau_Leg-C_Co_Ev_Vie_Brice          33_Wau_Leg-C_Co_Er_Vie_Gilles 
##                                      6                                      6 
##         34_Wau_Leg-C_Co_Ev_Vie_Martial         35_Wau_Leg-C_Co_Ev_Vie_Nicolas 
##                                      6                                      6 
##        36_Wau_Leg-C_Co_Ev_Mir_Nicolas2        37_Wau_Leg-C_Co_Ev_Tra_Nicolas3 
##                                      6                                      6 
##          38_Wau_Leg-C_Co_Ev_Vie_Jerome          39_Wau_Leg-C_Co_Ev_Vie_Benoit 
##                                      6                                      6 
##          40_Wau_Leg-C_Co_Er_Vie_Alexis           41_Ano_Leg-C_Vi_NA_Vie_Irene 
##                                      6                                      7 
##       42_Ano_Leg-B_Vi_NA_Ass_NotreDame       43_Ano_Leg-C_Vi_NA_Vie_Catherine 
##                                      8                                      8 
##           44_Ano_Leg-C_Ap_NA_Vie_Andre          45_Ano_Leg-C_Ap_NA_Pas_Andre2 
##                                      8                                      8 
##         46_Ano_Leg-B_Co_NA_Pur_Patrice      47_Ano_Leg-C_Co_er_Vie_PaulErmite 
##                                      7                                      7 
##         48_Ano_Leg-C_Co_ev_Tra_Benoit2            49_Ano_Leg-C_NA_NA_Vie_Maur 
##                                      7                                      7 
##         50_Ano_Leg-C_NA_NA_Vie_Placide        51_Ano_Leg-C_Ma_ho_Vie_Eustache 
##                                      7                                      7 
##           52_Ano_Leg-C_Co_NA_Vie_Fursi      53_Ano_Leg-C_Vi_NA_Vie_Marguerite 
##                                      7                                      9 
##         54_Ano_Leg-C_Vi_NA_Vie_Pelagie          55_Ano_Leg-C_Co_NA_Vie_Simeon 
##                                      9                                      7 
##        56_Ano_Leg-C_Co_NA_Vie_Mamertin          57_Ano_Leg-C_Vi_NA_Vie_Julien 
##                                      9                                      7 
## 58_Ano_Leg-C_Vi_NA_Vie_MarieEgyptienne       59_Ano_Leg-C_Vi_NA_Vie_Euphrasie 
##                                      9                                      9 
##      60_Ano_Leg-B_NA_NA_NA_Antechriste 
##                                      8

Classes description

Most correlated features to the classification in general
maDesc = classesDesc(myCAH, d, k = 9)
head(maDesc$quanti.var, n = 20)
##                           Eta2      P-value
## ls_                  0.8554356 2.098608e-18
## els^                 0.8099116 1.684037e-15
## que                  0.7980243 7.355341e-15
## qil                  0.7671960 2.291409e-13
## CONsub.PONfbl.PROper 0.7645207 3.018836e-13
## PONfrt.CONcoo.ADVgen 0.7579528 5.859415e-13
## CONcoo.ADVgen.VERcjg 0.7491702 1.381757e-12
## _un                  0.7386842 3.695641e-12
## _q                   0.7247222 1.286130e-11
## q_                   0.7247222 1.286130e-11
## q                    0.7204758 1.854589e-11
## $fai                 0.7162909 2.644950e-11
## CONcoo.PROper.VERcjg 0.7155295 2.819706e-11
## nte^                 0.7110078 4.107902e-11
## PONfbl.PROper.VERcjg 0.7098832 4.506502e-11
## om_                  0.7084025 5.087916e-11
## _fa                  0.7020561 8.496285e-11
## dont                 0.6938190 1.624822e-10
## $sai                 0.6917134 1.911954e-10
## ut_                  0.6801267 4.584636e-10
A = myDescPlot(relativeFreqs(data)["ls_", , drop = FALSE], classes, type = "violinplot", main = "ls_", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
B = myDescPlot(relativeFreqs(data)["els^", , drop = FALSE], classes, type = "violinplot", main = "els^", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
C = myDescPlot(relativeFreqs(data)["que", , drop = FALSE], classes, type = "violinplot", main = "que", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
D = myDescPlot(relativeFreqs(data)["qil", , drop = FALSE], classes, type = "violinplot", main = "qil", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
E = myDescPlot(relativeFreqs(data)["CONsub PONfbl PROper", , drop = FALSE], classes, type = "violinplot", main = "CONsub.PONfbl.PROper", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
F = myDescPlot(relativeFreqs(data)["PONfrt CONcoo ADVgen", , drop = FALSE], classes, type = "violinplot", main = "PONfrt.CONcoo.ADVgen", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
gridExtra::grid.arrange(A,B,C,D,E,F, ncol = 2)

WauFeats2a = myDescPlot(relativeFreqs(data)["oi_", , drop = FALSE], classes, type = "violinplot", main = "oi_", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
WauFeats2b = myDescPlot(relativeFreqs(data)["tu", , drop = FALSE], classes, type = "violinplot", main = "tu", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
WauFeats3 = myDescPlot(relativeFreqs(data)["com", , drop = FALSE], classes, type = "violinplot", main = "com", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
WauFeats4 = myDescPlot(relativeFreqs(data)["PONfbl PROper VERcjg", , drop = FALSE], classes, type = "violinplot", main = "PONfbl PROper VERcjg", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
Features and clusters (v-test, distribution,…): Wauchier
nfeats = 10
values = c(head(sort(maDesc$quanti$`6`[,1], decreasing = TRUE), n = nfeats), head(sort(maDesc$quanti$`6`[,1]), n = nfeats))
classBarplot(values, title="V-test for Wauchier class", ylab = "v-test")

Example of two feats of Wauchier class

class = as.factor(classes)
levels(class) = classlabels
levels(class) = c(levels(class), "LAMB")
class["26_Ano_Leg-B_Ma_Ev_Vie_Lambert"] = "LAMB"
#NB: 
rf = cbind(as.data.frame(t(relativeFreqs(data))), class)
qplot(`PONfbl PROper VERcjg`, com, colour=class, data = rf)

Specificities
data = rbind(dataAffs, dataPOS3gr, dataFW)
specifPlot(data, myCAH, k = 9, classlabels = classlabels)

Affixes + POS 3-gr + Function words (both)

data = rbind(AffixesSave, POS3grSave, FWSave, FLSave)
d = normalisations(data)
myCAH = cluster::agnes(t(d), metric = "manhattan", method="ward")
# Save
CAHGlob3 = myCAH
#TODO: heights
# barplot(sort(myCAH$height))
plotGlob3 = cahPlotCol(myCAH, k = 9, main = "Affixes + POS 3- grams + Function words (both)")
#somCAH = somCluster(d)
#somCAHGlob3 = somCAH
#somplotGlob3 = cahPlotCol(somCAH, k = 9, main = "SOM BASED - Affixes + POS 3- grams + Function words (unnorm.)")

Exp. word-forms+lemmas

data = rbind(LemmasSave, WordsSave)
d = normalisations(data)
myCAH = cluster::agnes(t(d), metric = "manhattan", method="ward")
# Save
CAHWordsLemmas = myCAH
#TODO: heights
# barplot(sort(myCAH$height))
plotWordsLemmas = cahPlotCol(myCAH, k = 9, main = "Word forms + lemmas")
#somCAH = somCluster(d)
#somCAHWordsLemmas = somCAH
#somplotWordsLemmas = cahPlotCol(somCAH, k = 9, main = "SOM BASED - Word forms + lemmas")

Class descriptions, Wauchier and StLambert

Classes members

classes = cutree(myCAH, k = 9)
classes
##         00_Ano_Leg-A_Ap_Ev_Dis_Pierre1         01_Ano_Leg-A_Ap_NA_Vie_Pierre2 
##                                      1                                      1 
##            02_Ano_Leg-A_Ap_NA_Pas_Paul         04_Ano_Leg-A_Ap_NA_Vie_Jean_Ev 
##                                      1                                      1 
##         05_Ano_Leg-A_Ap_NA_Vie_Jacques        06_Ano_Leg-A_Ap_NA_Vie_Matthieu 
##                                      2                                      3 
##       07_Ano_Leg-A_Ap_NA_Vie_SimonJude        08_Ano_Leg-A_Ap_NA_Vie_Philippe 
##                                      3                                      3 
##   09_Ano_Leg-A_Ap_NA_Vie_JacquesMineur      10_Ano_Leg-A_Ap_NA_Vie_Barthelemy 
##                                      3                                      3 
##            11_Ano_Leg-A_Ap_NA_Vie_Marc          12_Ano_Leg-A_Ma_Ho_Vie_Longin 
##                                      3                                      4 
##       13_Ano_Leg-B_Ma_Ho_Vie_Sebastien         14_Ano_Leg-B_Ma_Ho_Vie_Vincent 
##                                      4                                      4 
##         15_Ano_Leg-B_Ma_Ho_Vie_Georges      16_Ano_Leg-B_Ma_Ho_Vie_Christophe 
##                                      4                                      4 
##          17_Ano_Leg-B_Ma_Fe_Vie_Agathe            18_Ano_Leg-B_Ma_Fe_Vie_Luce 
##                                      4                                      4 
##           19_Ano_Leg-B_Ma_Fe_Vie_Agnes        20_Ano_Leg-B_Ma_Fe_Vie_Felicite 
##                                      4                                      4 
##       21_Ano_Leg-B_Ma_Fe_Vie_Christine          22_Ano_Leg-B_Ma_Fe_Vie_Cecile 
##                                      4                                      4 
##           23_Ano_Leg-B_Ma_Ho_Vie_Sixte         24_Ano_Leg-B_Ma_Ho_Vie_Laurent 
##                                      5                                      5 
##       25_Ano_Leg-B_Ma_Ho_Vie_Hippolyte         26_Ano_Leg-B_Ma_Ev_Vie_Lambert 
##                                      5                                      6 
##       27_Ano_Leg-B_Ma_Ho_Vie_Pantaleon         28_Ano_Leg-B_Ma_Ho_Vie_Clement 
##                                      4                                      7 
##          29_Wau_Leg-C_Co_Ev_Vie_Martin         31_Wau_Leg-C_Co_Ev_Dia_Martin3 
##                                      6                                      6 
##           32_Wau_Leg-C_Co_Ev_Vie_Brice          33_Wau_Leg-C_Co_Er_Vie_Gilles 
##                                      8                                      6 
##         34_Wau_Leg-C_Co_Ev_Vie_Martial         35_Wau_Leg-C_Co_Ev_Vie_Nicolas 
##                                      6                                      8 
##        36_Wau_Leg-C_Co_Ev_Mir_Nicolas2        37_Wau_Leg-C_Co_Ev_Tra_Nicolas3 
##                                      6                                      6 
##          38_Wau_Leg-C_Co_Ev_Vie_Jerome          39_Wau_Leg-C_Co_Ev_Vie_Benoit 
##                                      8                                      6 
##          40_Wau_Leg-C_Co_Er_Vie_Alexis           41_Ano_Leg-C_Vi_NA_Vie_Irene 
##                                      8                                      7 
##       42_Ano_Leg-B_Vi_NA_Ass_NotreDame       43_Ano_Leg-C_Vi_NA_Vie_Catherine 
##                                      1                                      1 
##           44_Ano_Leg-C_Ap_NA_Vie_Andre          45_Ano_Leg-C_Ap_NA_Pas_Andre2 
##                                      1                                      2 
##         46_Ano_Leg-B_Co_NA_Pur_Patrice      47_Ano_Leg-C_Co_er_Vie_PaulErmite 
##                                      7                                      7 
##         48_Ano_Leg-C_Co_ev_Tra_Benoit2            49_Ano_Leg-C_NA_NA_Vie_Maur 
##                                      7                                      7 
##         50_Ano_Leg-C_NA_NA_Vie_Placide        51_Ano_Leg-C_Ma_ho_Vie_Eustache 
##                                      7                                      7 
##           52_Ano_Leg-C_Co_NA_Vie_Fursi      53_Ano_Leg-C_Vi_NA_Vie_Marguerite 
##                                      7                                      9 
##         54_Ano_Leg-C_Vi_NA_Vie_Pelagie          55_Ano_Leg-C_Co_NA_Vie_Simeon 
##                                      9                                      7 
##        56_Ano_Leg-C_Co_NA_Vie_Mamertin          57_Ano_Leg-C_Vi_NA_Vie_Julien 
##                                      9                                      7 
## 58_Ano_Leg-C_Vi_NA_Vie_MarieEgyptienne       59_Ano_Leg-C_Vi_NA_Vie_Euphrasie 
##                                      7                                      9 
##      60_Ano_Leg-B_NA_NA_NA_Antechriste 
##                                      9

Classes description

Most correlated features to the classification in general
maDesc = classesDesc(myCAH, d, k = 9)
head(maDesc$quanti.var, n = 20)
##               Eta2      P-value
## decivre  0.9032276 1.076220e-22
## cesar    0.8755934 5.253827e-20
## decies   0.8640483 4.650249e-19
## cesar    0.8552058 2.181955e-18
## apostle  0.8113585 1.398326e-15
## decius   0.8073003 2.346826e-15
## lorent   0.7908430 1.716862e-14
## apostres 0.7797098 6.028987e-14
## qil      0.7620373 3.887946e-13
## que      0.7535303 9.061669e-13
## lorenc   0.7416829 2.801402e-12
## ualerien 0.7208001 1.803850e-11
## sacrefie 0.7075425 5.457799e-11
## lorenz   0.7050629 6.673653e-11
## apostre  0.7025230 8.185064e-11
## dont     0.6796929 4.734097e-10
## un       0.6774601 5.579776e-10
## u        0.6760321 6.194260e-10
## faire    0.6715006 8.600987e-10
## decier   0.6676635 1.131368e-09
A = myDescPlot(relativeFreqs(data)["decivre", , drop = FALSE], classes, type = "violinplot", main = "decivre", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
B = myDescPlot(relativeFreqs(data)["cesar", , drop = FALSE], classes, type = "violinplot", main = "cesar", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
C = myDescPlot(relativeFreqs(data)["decies", , drop = FALSE], classes, type = "violinplot", main = "decies", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
D = myDescPlot(relativeFreqs(data)["apostle", , drop = FALSE], classes, type = "violinplot", main = "apostle", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
E = myDescPlot(relativeFreqs(data)["decius", , drop = FALSE], classes, type = "violinplot", main = "decius", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
F = myDescPlot(relativeFreqs(data)["lorent", , drop = FALSE], classes, type = "violinplot", main = "lorent", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
gridExtra::grid.arrange(A,B,C,D,E,F, ncol = 2)

WauFeats5 = myDescPlot(relativeFreqs(data)["hom", , drop = FALSE], classes, type = "violinplot", main = "hom", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
WauFeats6 = myDescPlot(relativeFreqs(data)["o4", , drop = FALSE], classes, type = "violinplot", main = "o4", ylab = "Relative frequency", xlab = "", classlabels = classlabels)
Features and clusters (v-test, distribution,…): Wauchier

TWO WAUCHIER CLASSES

nfeats = 10
values = c(head(sort(maDesc$quanti$`6`[,1], decreasing = TRUE), n = nfeats), head(sort(maDesc$quanti$`6`[,1]), n = nfeats))
classBarplot(values, title="V-test for Wauchier class", ylab = "v-test")

values = c(head(sort(maDesc$quanti$`8`[,1], decreasing = TRUE), n = nfeats), head(sort(maDesc$quanti$`8`[,1]), n = nfeats))
classBarplot(values, title="V-test for Wauchier class", ylab = "v-test")

Example of two main feats of Wauchier class

class = as.factor(classes)
levels(class) = classlabels
levels(class) = c(levels(class), "LAMB")
class["26_Ano_Leg-B_Ma_Ev_Vie_Lambert"] = "LAMB"
#NB: 
rf = cbind(as.data.frame(t(relativeFreqs(data))), class)
rf = rf[, c("hom", "pastor", "que")]
# Two main of Wauchier class
qplot(hom, pastor, colour=class, data = rf)

Specificities
#TODO: fix to take only the one that have been actually selected by the Moisl formula
data = rbind(dataLemmas, dataWords)
specifPlot(data, myCAH, k = 5, classlabels = classlabels)

Plots and tables

Reference results on the three feature sets

gridExtra::grid.arrange(plotRaw3grams, plotGlob2, plotWordsLemmas, ncol = 1)

Complementary results

#featlabel = "features of ME ±2σ with conf. > 90%"
#A = cahPlotCol(CAHLemma, main = "A", xlab = paste( ncol(CAHLemma$data), featlabel), k = 6, lrect = -12)
# B = cahPlotCol(CAHRhyme, main = "B", xlab = paste( ncol(CAHRhyme$data), featlabel), k = 6, lrect = -7, ylab = " ")
# C = cahPlotCol(CAHAllWords, main = "C", xlab = paste( ncol(CAHAllWords$data), featlabel), k = 6, ylab = " ")
# D = cahPlotCol(CAHAffs, main = "D", xlab = paste( ncol(CAHAffs$data), featlabel), k = 6, ylab = " ")
# E = cahPlotCol(CAHPOS3gr, main = "E", xlab = paste( ncol(CAHPOS3gr$data), featlabel), k = 6, lrect = -12 , ylab = " ")
# F = cahPlotCol(CAHmfw, main = "F", k = 6, lrect = -5, ylab = " ")
# gridExtra::grid.arrange(A, B, C, D, E, F, ncol = 2)
gridExtra::grid.arrange(plotAffixes, plotFW, plotFL, plotPOS3grams, plotForms, plotLemmas, ncol = 2)

Comparison three alternatives, FS2

gridExtra::grid.arrange(plotGlob, plotGlob2, plotGlob3, ncol = 1)

Robustness

cahList = list(raw3grams = CAHRaw3gr, Affs = CAHAffs, FunctWords = CAHFW, FunctLemm = CAHFL, POS3gr = CAHPOS3gr, FWPOSandAffs = CAHGlob2, Forms = CAHForms,  Lemmas = CAHLemmas, WordsLemmas = CAHWordsLemmas)
#compareHC(cahList, k = 9)

benchmark = benchmarkHC(CAHRaw3gr, cahList, k = 9)
round(benchmark, digits = 2)
##                 N   AC CPMeyer CPREF
## raw3grams    1276 0.63    0.90  1.00
## Affs          774 0.65    0.90  0.86
## FunctWords    171 0.72    0.86  0.81
## FunctLemm     100 0.69    0.80  0.73
## POS3gr        328 0.68    0.81  0.68
## FWPOSandAffs 1273 0.65    0.88  0.86
## Forms         698 0.63    0.85  0.81
## Lemmas        512 0.59    0.85  0.73
## WordsLemmas  1210 0.62    0.85  0.85
# # Now with SOM
# cahSOMList = list(raw3grams = somCAHRaw3gr, Affs = somCAHAffs, FunctLemm = somCAHFL, POS3gr = somCAHPOS3gr, FLPOSandAffs = somCAHGlob, FWPOSandAffs = somCAHGlob2, FLFWPOSandAffs = somCAHGlob3, Forms = somCAHForms,  Lemmas = somCAHLemmas, WordsLemmas = somCAHWordsLemmas, UnnormFW = somCAHFW)
# 
# benchmark = benchmarkHC(CAHRaw3gr, cahSOMList, k = 9)
# round(benchmark, digits = 2)

Volatility index

Ref analyses

# ONLY on the three reference analyses
cahList = list(raw3grams = CAHRaw3gr,  FWPOSandAffs = CAHGlob2, WordsLemmas = CAHWordsLemmas)

vol = volatility(cahList, k = 9)

volRef = merge(round(vol, digits = 2), nwords, by="row.names", all.x=TRUE, all.y=FALSE)

volRef[order(volRef[, "V_i"]), ]
##                                 Row.names   V_i     y
## 5          05_Ano_Leg-A_Ap_NA_Vie_Jacques -0.21 17920
## 57 58_Ano_Leg-C_Vi_NA_Vie_MarieEgyptienne -0.08  5529
## 11            11_Ano_Leg-A_Ap_NA_Vie_Marc  0.03  1820
## 59      60_Ano_Leg-B_NA_NA_NA_Antechriste  0.11  1485
## 23           23_Ano_Leg-B_Ma_Ho_Vie_Sixte  0.17  1894
## 24         24_Ano_Leg-B_Ma_Ho_Vie_Laurent  0.17  3243
## 25       25_Ano_Leg-B_Ma_Ho_Vie_Hippolyte  0.17  2513
## 41       42_Ano_Leg-B_Vi_NA_Ass_NotreDame  0.26  3119
## 42       43_Ano_Leg-C_Vi_NA_Vie_Catherine  0.26  8877
## 43           44_Ano_Leg-C_Ap_NA_Vie_Andre  0.26  3118
## 1          00_Ano_Leg-A_Ap_Ev_Dis_Pierre1  0.33  6774
## 2          01_Ano_Leg-A_Ap_NA_Vie_Pierre2  0.33  5527
## 3             02_Ano_Leg-A_Ap_NA_Pas_Paul  0.33  4798
## 4          04_Ano_Leg-A_Ap_NA_Vie_Jean_Ev  0.33  4955
## 12          12_Ano_Leg-A_Ma_Ho_Vie_Longin  0.33  2244
## 15         15_Ano_Leg-B_Ma_Ho_Vie_Georges  0.33  4548
## 20        20_Ano_Leg-B_Ma_Fe_Vie_Felicite  0.33  1676
## 27       27_Ano_Leg-B_Ma_Ho_Vie_Pantaleon  0.33  6565
## 44          45_Ano_Leg-C_Ap_NA_Pas_Andre2  0.33 13315
## 28         28_Ano_Leg-B_Ma_Ho_Vie_Clement  0.44  2544
## 40           41_Ano_Leg-C_Vi_NA_Vie_Irene  0.44  3145
## 56          57_Ano_Leg-C_Vi_NA_Vie_Julien  0.44  2766
## 13       13_Ano_Leg-B_Ma_Ho_Vie_Sebastien  0.56  3539
## 14         14_Ano_Leg-B_Ma_Ho_Vie_Vincent  0.56  4838
## 16      16_Ano_Leg-B_Ma_Ho_Vie_Christophe  0.56  9122
## 17          17_Ano_Leg-B_Ma_Fe_Vie_Agathe  0.56  3109
## 18            18_Ano_Leg-B_Ma_Fe_Vie_Luce  0.56  2366
## 19           19_Ano_Leg-B_Ma_Fe_Vie_Agnes  0.56  4177
## 21       21_Ano_Leg-B_Ma_Fe_Vie_Christine  0.56  7481
## 22          22_Ano_Leg-B_Ma_Fe_Vie_Cecile  0.56  6782
## 31           32_Wau_Leg-C_Co_Ev_Vie_Brice  0.56  1385
## 34         35_Wau_Leg-C_Co_Ev_Vie_Nicolas  0.56  1960
## 37          38_Wau_Leg-C_Co_Ev_Vie_Jerome  0.56  2425
## 39          40_Wau_Leg-C_Co_Er_Vie_Alexis  0.56  4103
## 52      53_Ano_Leg-C_Vi_NA_Vie_Marguerite  0.67  1935
## 53         54_Ano_Leg-C_Vi_NA_Vie_Pelagie  0.67  1506
## 55        56_Ano_Leg-C_Co_NA_Vie_Mamertin  0.67  2202
## 58       59_Ano_Leg-C_Vi_NA_Vie_Euphrasie  0.67  1293
## 6         06_Ano_Leg-A_Ap_NA_Vie_Matthieu  0.71  6447
## 7        07_Ano_Leg-A_Ap_NA_Vie_SimonJude  0.71  6784
## 8         08_Ano_Leg-A_Ap_NA_Vie_Philippe  0.71  1014
## 9    09_Ano_Leg-A_Ap_NA_Vie_JacquesMineur  0.71  1356
## 10      10_Ano_Leg-A_Ap_NA_Vie_Barthelemy  0.71  4360
## 45         46_Ano_Leg-B_Co_NA_Pur_Patrice  0.72  7872
## 46      47_Ano_Leg-C_Co_er_Vie_PaulErmite  0.72  3753
## 47         48_Ano_Leg-C_Co_ev_Tra_Benoit2  0.72  3234
## 48            49_Ano_Leg-C_NA_NA_Vie_Maur  0.72  6310
## 49         50_Ano_Leg-C_NA_NA_Vie_Placide  0.72  2783
## 50        51_Ano_Leg-C_Ma_ho_Vie_Eustache  0.72  3099
## 51           52_Ano_Leg-C_Co_NA_Vie_Fursi  0.72  2492
## 54          55_Ano_Leg-C_Co_NA_Vie_Simeon  0.72  2894
## 26         26_Ano_Leg-B_Ma_Ev_Vie_Lambert  0.78  5247
## 29          29_Wau_Leg-C_Co_Ev_Vie_Martin  0.78 14432
## 30         31_Wau_Leg-C_Co_Ev_Dia_Martin3  0.78 18971
## 32          33_Wau_Leg-C_Co_Er_Vie_Gilles  0.78  4415
## 33         34_Wau_Leg-C_Co_Ev_Vie_Martial  0.78 15255
## 35        36_Wau_Leg-C_Co_Ev_Mir_Nicolas2  0.78 10473
## 36        37_Wau_Leg-C_Co_Ev_Tra_Nicolas3  0.78  8379
## 38          39_Wau_Leg-C_Co_Ev_Vie_Benoit  0.78 12792
# see if there is a correlation
reg = lm(volRef[, 3] ~ volRef[, 2])
summary(reg)
## 
## Call:
## lm(formula = volRef[, 3] ~ volRef[, 2])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -4430  -2828  -1627   1419  13453 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4690       1299   3.609 0.000649 ***
## volRef[, 2]     1062       2289   0.464 0.644417    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4199 on 57 degrees of freedom
## Multiple R-squared:  0.003763,   Adjusted R-squared:  -0.01371 
## F-statistic: 0.2153 on 1 and 57 DF,  p-value: 0.6444
plot(volRef[, 2], volRef[, 3])
abline(reg)

# Et la distrib des VI
boxplot(volRef[, 2])

hist(volRef[, 2])

Supplementary analyses

# ONLY on the three reference analyses
cahList = list(Affs = CAHAffs, FunctWords = CAHFW, FunctLemm = CAHFL, POS3gr = CAHPOS3gr, Forms = CAHForms,  Lemmas = CAHLemmas)

vol = volatility(cahList, k = 9)

volSuppl = merge(round(vol, digits = 2), nwords, by="row.names", all.x=TRUE, all.y=FALSE)

volSuppl[order(volSuppl[, "V_i"]), ]
##                                 Row.names   V_i     y
## 44          45_Ano_Leg-C_Ap_NA_Pas_Andre2 -0.46 13315
## 5          05_Ano_Leg-A_Ap_NA_Vie_Jacques -0.42 17920
## 11            11_Ano_Leg-A_Ap_NA_Vie_Marc -0.42  1820
## 59      60_Ano_Leg-B_NA_NA_NA_Antechriste -0.40  1485
## 28         28_Ano_Leg-B_Ma_Ho_Vie_Clement -0.38  2544
## 13       13_Ano_Leg-B_Ma_Ho_Vie_Sebastien -0.33  3539
## 26         26_Ano_Leg-B_Ma_Ev_Vie_Lambert -0.32  5247
## 45         46_Ano_Leg-B_Co_NA_Pur_Patrice -0.31  7872
## 46      47_Ano_Leg-C_Co_er_Vie_PaulErmite -0.31  3753
## 40           41_Ano_Leg-C_Vi_NA_Vie_Irene -0.30  3145
## 8         08_Ano_Leg-A_Ap_NA_Vie_Philippe -0.27  1014
## 9    09_Ano_Leg-A_Ap_NA_Vie_JacquesMineur -0.27  1356
## 41       42_Ano_Leg-B_Vi_NA_Ass_NotreDame -0.25  3119
## 42       43_Ano_Leg-C_Vi_NA_Vie_Catherine -0.25  8877
## 43           44_Ano_Leg-C_Ap_NA_Vie_Andre -0.25  3118
## 14         14_Ano_Leg-B_Ma_Ho_Vie_Vincent -0.23  4838
## 19           19_Ano_Leg-B_Ma_Fe_Vie_Agnes -0.23  4177
## 27       27_Ano_Leg-B_Ma_Ho_Vie_Pantaleon -0.23  6565
## 57 58_Ano_Leg-C_Vi_NA_Vie_MarieEgyptienne -0.22  5529
## 50        51_Ano_Leg-C_Ma_ho_Vie_Eustache -0.14  3099
## 24         24_Ano_Leg-B_Ma_Ho_Vie_Laurent -0.07  3243
## 25       25_Ano_Leg-B_Ma_Ho_Vie_Hippolyte -0.07  2513
## 23           23_Ano_Leg-B_Ma_Ho_Vie_Sixte -0.06  1894
## 49         50_Ano_Leg-C_NA_NA_Vie_Placide -0.06  2783
## 22          22_Ano_Leg-B_Ma_Fe_Vie_Cecile -0.02  6782
## 52      53_Ano_Leg-C_Vi_NA_Vie_Marguerite  0.00  1935
## 53         54_Ano_Leg-C_Vi_NA_Vie_Pelagie  0.00  1506
## 58       59_Ano_Leg-C_Vi_NA_Vie_Euphrasie  0.00  1293
## 31           32_Wau_Leg-C_Co_Ev_Vie_Brice  0.03  1385
## 12          12_Ano_Leg-A_Ma_Ho_Vie_Longin  0.04  2244
## 20        20_Ano_Leg-B_Ma_Fe_Vie_Felicite  0.05  1676
## 54          55_Ano_Leg-C_Co_NA_Vie_Simeon  0.07  2894
## 55        56_Ano_Leg-C_Co_NA_Vie_Mamertin  0.09  2202
## 6         06_Ano_Leg-A_Ap_NA_Vie_Matthieu  0.10  6447
## 7        07_Ano_Leg-A_Ap_NA_Vie_SimonJude  0.10  6784
## 10      10_Ano_Leg-A_Ap_NA_Vie_Barthelemy  0.10  4360
## 56          57_Ano_Leg-C_Vi_NA_Vie_Julien  0.11  2766
## 16      16_Ano_Leg-B_Ma_Ho_Vie_Christophe  0.14  9122
## 17          17_Ano_Leg-B_Ma_Fe_Vie_Agathe  0.14  3109
## 18            18_Ano_Leg-B_Ma_Fe_Vie_Luce  0.14  2366
## 21       21_Ano_Leg-B_Ma_Fe_Vie_Christine  0.14  7481
## 47         48_Ano_Leg-C_Co_ev_Tra_Benoit2  0.15  3234
## 48            49_Ano_Leg-C_NA_NA_Vie_Maur  0.15  6310
## 51           52_Ano_Leg-C_Co_NA_Vie_Fursi  0.15  2492
## 37          38_Wau_Leg-C_Co_Ev_Vie_Jerome  0.16  2425
## 15         15_Ano_Leg-B_Ma_Ho_Vie_Georges  0.18  4548
## 1          00_Ano_Leg-A_Ap_Ev_Dis_Pierre1  0.19  6774
## 2          01_Ano_Leg-A_Ap_NA_Vie_Pierre2  0.19  5527
## 3             02_Ano_Leg-A_Ap_NA_Pas_Paul  0.19  4798
## 4          04_Ano_Leg-A_Ap_NA_Vie_Jean_Ev  0.19  4955
## 33         34_Wau_Leg-C_Co_Ev_Vie_Martial  0.40 15255
## 35        36_Wau_Leg-C_Co_Ev_Mir_Nicolas2  0.40 10473
## 36        37_Wau_Leg-C_Co_Ev_Tra_Nicolas3  0.40  8379
## 29          29_Wau_Leg-C_Co_Ev_Vie_Martin  0.50 14432
## 30         31_Wau_Leg-C_Co_Ev_Dia_Martin3  0.50 18971
## 32          33_Wau_Leg-C_Co_Er_Vie_Gilles  0.72  4415
## 34         35_Wau_Leg-C_Co_Ev_Vie_Nicolas  0.72  1960
## 38          39_Wau_Leg-C_Co_Ev_Vie_Benoit  0.72 12792
## 39          40_Wau_Leg-C_Co_Er_Vie_Alexis  0.72  4103
# see if there is a correlation
reg = lm(volSuppl[, 3] ~ volSuppl[, 2])
summary(reg)
## 
## Call:
## lm(formula = volSuppl[, 3] ~ volSuppl[, 2])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -5578  -2964  -1196   1235  14169 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     5146.0      533.6   9.644  1.4e-13 ***
## volSuppl[, 2]   3322.0     1763.5   1.884   0.0647 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4082 on 57 degrees of freedom
## Multiple R-squared:  0.0586, Adjusted R-squared:  0.04209 
## F-statistic: 3.548 on 1 and 57 DF,  p-value: 0.06471
plot(volSuppl[, 2], volSuppl[, 3])
abline(reg)

# Et la distrib des VI
boxplot(volSuppl[, 2])

hist(volSuppl[, 2])

Formatted table with both

out = merge(volRef, volSuppl, by="row.names", all.x=TRUE, all.y=TRUE)
rownames(out) = out[, 2]
out = out[, c(4, 3, 6)]
colnames(out) = c("NWords", "V_iRef", "V_iSuppl")
out[order(out[, 2]),]
##                                        NWords V_iRef V_iSuppl
## 05_Ano_Leg-A_Ap_NA_Vie_Jacques          17920  -0.21    -0.42
## 58_Ano_Leg-C_Vi_NA_Vie_MarieEgyptienne   5529  -0.08    -0.22
## 11_Ano_Leg-A_Ap_NA_Vie_Marc              1820   0.03    -0.42
## 60_Ano_Leg-B_NA_NA_NA_Antechriste        1485   0.11    -0.40
## 23_Ano_Leg-B_Ma_Ho_Vie_Sixte             1894   0.17    -0.06
## 24_Ano_Leg-B_Ma_Ho_Vie_Laurent           3243   0.17    -0.07
## 25_Ano_Leg-B_Ma_Ho_Vie_Hippolyte         2513   0.17    -0.07
## 42_Ano_Leg-B_Vi_NA_Ass_NotreDame         3119   0.26    -0.25
## 43_Ano_Leg-C_Vi_NA_Vie_Catherine         8877   0.26    -0.25
## 44_Ano_Leg-C_Ap_NA_Vie_Andre             3118   0.26    -0.25
## 00_Ano_Leg-A_Ap_Ev_Dis_Pierre1           6774   0.33     0.19
## 12_Ano_Leg-A_Ma_Ho_Vie_Longin            2244   0.33     0.04
## 15_Ano_Leg-B_Ma_Ho_Vie_Georges           4548   0.33     0.18
## 01_Ano_Leg-A_Ap_NA_Vie_Pierre2           5527   0.33     0.19
## 20_Ano_Leg-B_Ma_Fe_Vie_Felicite          1676   0.33     0.05
## 27_Ano_Leg-B_Ma_Ho_Vie_Pantaleon         6565   0.33    -0.23
## 02_Ano_Leg-A_Ap_NA_Pas_Paul              4798   0.33     0.19
## 04_Ano_Leg-A_Ap_NA_Vie_Jean_Ev           4955   0.33     0.19
## 45_Ano_Leg-C_Ap_NA_Pas_Andre2           13315   0.33    -0.46
## 28_Ano_Leg-B_Ma_Ho_Vie_Clement           2544   0.44    -0.38
## 41_Ano_Leg-C_Vi_NA_Vie_Irene             3145   0.44    -0.30
## 57_Ano_Leg-C_Vi_NA_Vie_Julien            2766   0.44     0.11
## 13_Ano_Leg-B_Ma_Ho_Vie_Sebastien         3539   0.56    -0.33
## 14_Ano_Leg-B_Ma_Ho_Vie_Vincent           4838   0.56    -0.23
## 16_Ano_Leg-B_Ma_Ho_Vie_Christophe        9122   0.56     0.14
## 17_Ano_Leg-B_Ma_Fe_Vie_Agathe            3109   0.56     0.14
## 18_Ano_Leg-B_Ma_Fe_Vie_Luce              2366   0.56     0.14
## 19_Ano_Leg-B_Ma_Fe_Vie_Agnes             4177   0.56    -0.23
## 21_Ano_Leg-B_Ma_Fe_Vie_Christine         7481   0.56     0.14
## 22_Ano_Leg-B_Ma_Fe_Vie_Cecile            6782   0.56    -0.02
## 32_Wau_Leg-C_Co_Ev_Vie_Brice             1385   0.56     0.03
## 35_Wau_Leg-C_Co_Ev_Vie_Nicolas           1960   0.56     0.72
## 38_Wau_Leg-C_Co_Ev_Vie_Jerome            2425   0.56     0.16
## 40_Wau_Leg-C_Co_Er_Vie_Alexis            4103   0.56     0.72
## 53_Ano_Leg-C_Vi_NA_Vie_Marguerite        1935   0.67     0.00
## 54_Ano_Leg-C_Vi_NA_Vie_Pelagie           1506   0.67     0.00
## 56_Ano_Leg-C_Co_NA_Vie_Mamertin          2202   0.67     0.09
## 59_Ano_Leg-C_Vi_NA_Vie_Euphrasie         1293   0.67     0.00
## 10_Ano_Leg-A_Ap_NA_Vie_Barthelemy        4360   0.71     0.10
## 06_Ano_Leg-A_Ap_NA_Vie_Matthieu          6447   0.71     0.10
## 07_Ano_Leg-A_Ap_NA_Vie_SimonJude         6784   0.71     0.10
## 08_Ano_Leg-A_Ap_NA_Vie_Philippe          1014   0.71    -0.27
## 09_Ano_Leg-A_Ap_NA_Vie_JacquesMineur     1356   0.71    -0.27
## 46_Ano_Leg-B_Co_NA_Pur_Patrice           7872   0.72    -0.31
## 47_Ano_Leg-C_Co_er_Vie_PaulErmite        3753   0.72    -0.31
## 48_Ano_Leg-C_Co_ev_Tra_Benoit2           3234   0.72     0.15
## 49_Ano_Leg-C_NA_NA_Vie_Maur              6310   0.72     0.15
## 50_Ano_Leg-C_NA_NA_Vie_Placide           2783   0.72    -0.06
## 51_Ano_Leg-C_Ma_ho_Vie_Eustache          3099   0.72    -0.14
## 52_Ano_Leg-C_Co_NA_Vie_Fursi             2492   0.72     0.15
## 55_Ano_Leg-C_Co_NA_Vie_Simeon            2894   0.72     0.07
## 26_Ano_Leg-B_Ma_Ev_Vie_Lambert           5247   0.78    -0.32
## 29_Wau_Leg-C_Co_Ev_Vie_Martin           14432   0.78     0.50
## 31_Wau_Leg-C_Co_Ev_Dia_Martin3          18971   0.78     0.50
## 33_Wau_Leg-C_Co_Er_Vie_Gilles            4415   0.78     0.72
## 34_Wau_Leg-C_Co_Ev_Vie_Martial          15255   0.78     0.40
## 36_Wau_Leg-C_Co_Ev_Mir_Nicolas2         10473   0.78     0.40
## 37_Wau_Leg-C_Co_Ev_Tra_Nicolas3          8379   0.78     0.40
## 39_Wau_Leg-C_Co_Ev_Vie_Benoit           12792   0.78     0.72
# First, transform data
volRegr = rbind( data.frame(NWords = volRef[, 3], V_i = volRef[, 2], type = "Ref"), 
                 data.frame(NWords = volSuppl[, 3], V_i = volSuppl[, 2], type = "Suppl"))


library(ggpmisc)
## 
## Attaching package: 'ggpmisc'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
ggplot(volRegr, aes(NWords, V_i, shape=type, colour=type, fill=type)) + geom_smooth(method="lm") +
  geom_point(size=3) + theme_bw() +
 # ggpmisc::stat_poly_eq(formula = quote(V_i) ~ quote(NWords), aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE) 
  ggpmisc::stat_fit_glance(method = 'lm', aes(label = paste0('p = ', round(..p.value.., 3), " Adj. R² = ", round(..adj.r.squared.., 3))))
## `geom_smooth()` using formula 'y ~ x'

With all

vol = volatility(cahList, k = 9)
out = merge(round(vol, digits = 2), nwords, by="row.names", all.x=TRUE, all.y=FALSE)
out[order(out[, "V_i"]), ]
##                                 Row.names   V_i     y
## 44          45_Ano_Leg-C_Ap_NA_Pas_Andre2 -0.46 13315
## 5          05_Ano_Leg-A_Ap_NA_Vie_Jacques -0.42 17920
## 11            11_Ano_Leg-A_Ap_NA_Vie_Marc -0.42  1820
## 59      60_Ano_Leg-B_NA_NA_NA_Antechriste -0.40  1485
## 28         28_Ano_Leg-B_Ma_Ho_Vie_Clement -0.38  2544
## 13       13_Ano_Leg-B_Ma_Ho_Vie_Sebastien -0.33  3539
## 26         26_Ano_Leg-B_Ma_Ev_Vie_Lambert -0.32  5247
## 45         46_Ano_Leg-B_Co_NA_Pur_Patrice -0.31  7872
## 46      47_Ano_Leg-C_Co_er_Vie_PaulErmite -0.31  3753
## 40           41_Ano_Leg-C_Vi_NA_Vie_Irene -0.30  3145
## 8         08_Ano_Leg-A_Ap_NA_Vie_Philippe -0.27  1014
## 9    09_Ano_Leg-A_Ap_NA_Vie_JacquesMineur -0.27  1356
## 41       42_Ano_Leg-B_Vi_NA_Ass_NotreDame -0.25  3119
## 42       43_Ano_Leg-C_Vi_NA_Vie_Catherine -0.25  8877
## 43           44_Ano_Leg-C_Ap_NA_Vie_Andre -0.25  3118
## 14         14_Ano_Leg-B_Ma_Ho_Vie_Vincent -0.23  4838
## 19           19_Ano_Leg-B_Ma_Fe_Vie_Agnes -0.23  4177
## 27       27_Ano_Leg-B_Ma_Ho_Vie_Pantaleon -0.23  6565
## 57 58_Ano_Leg-C_Vi_NA_Vie_MarieEgyptienne -0.22  5529
## 50        51_Ano_Leg-C_Ma_ho_Vie_Eustache -0.14  3099
## 24         24_Ano_Leg-B_Ma_Ho_Vie_Laurent -0.07  3243
## 25       25_Ano_Leg-B_Ma_Ho_Vie_Hippolyte -0.07  2513
## 23           23_Ano_Leg-B_Ma_Ho_Vie_Sixte -0.06  1894
## 49         50_Ano_Leg-C_NA_NA_Vie_Placide -0.06  2783
## 22          22_Ano_Leg-B_Ma_Fe_Vie_Cecile -0.02  6782
## 52      53_Ano_Leg-C_Vi_NA_Vie_Marguerite  0.00  1935
## 53         54_Ano_Leg-C_Vi_NA_Vie_Pelagie  0.00  1506
## 58       59_Ano_Leg-C_Vi_NA_Vie_Euphrasie  0.00  1293
## 31           32_Wau_Leg-C_Co_Ev_Vie_Brice  0.03  1385
## 12          12_Ano_Leg-A_Ma_Ho_Vie_Longin  0.04  2244
## 20        20_Ano_Leg-B_Ma_Fe_Vie_Felicite  0.05  1676
## 54          55_Ano_Leg-C_Co_NA_Vie_Simeon  0.07  2894
## 55        56_Ano_Leg-C_Co_NA_Vie_Mamertin  0.09  2202
## 6         06_Ano_Leg-A_Ap_NA_Vie_Matthieu  0.10  6447
## 7        07_Ano_Leg-A_Ap_NA_Vie_SimonJude  0.10  6784
## 10      10_Ano_Leg-A_Ap_NA_Vie_Barthelemy  0.10  4360
## 56          57_Ano_Leg-C_Vi_NA_Vie_Julien  0.11  2766
## 16      16_Ano_Leg-B_Ma_Ho_Vie_Christophe  0.14  9122
## 17          17_Ano_Leg-B_Ma_Fe_Vie_Agathe  0.14  3109
## 18            18_Ano_Leg-B_Ma_Fe_Vie_Luce  0.14  2366
## 21       21_Ano_Leg-B_Ma_Fe_Vie_Christine  0.14  7481
## 47         48_Ano_Leg-C_Co_ev_Tra_Benoit2  0.15  3234
## 48            49_Ano_Leg-C_NA_NA_Vie_Maur  0.15  6310
## 51           52_Ano_Leg-C_Co_NA_Vie_Fursi  0.15  2492
## 37          38_Wau_Leg-C_Co_Ev_Vie_Jerome  0.16  2425
## 15         15_Ano_Leg-B_Ma_Ho_Vie_Georges  0.18  4548
## 1          00_Ano_Leg-A_Ap_Ev_Dis_Pierre1  0.19  6774
## 2          01_Ano_Leg-A_Ap_NA_Vie_Pierre2  0.19  5527
## 3             02_Ano_Leg-A_Ap_NA_Pas_Paul  0.19  4798
## 4          04_Ano_Leg-A_Ap_NA_Vie_Jean_Ev  0.19  4955
## 33         34_Wau_Leg-C_Co_Ev_Vie_Martial  0.40 15255
## 35        36_Wau_Leg-C_Co_Ev_Mir_Nicolas2  0.40 10473
## 36        37_Wau_Leg-C_Co_Ev_Tra_Nicolas3  0.40  8379
## 29          29_Wau_Leg-C_Co_Ev_Vie_Martin  0.50 14432
## 30         31_Wau_Leg-C_Co_Ev_Dia_Martin3  0.50 18971
## 32          33_Wau_Leg-C_Co_Er_Vie_Gilles  0.72  4415
## 34         35_Wau_Leg-C_Co_Ev_Vie_Nicolas  0.72  1960
## 38          39_Wau_Leg-C_Co_Ev_Vie_Benoit  0.72 12792
## 39          40_Wau_Leg-C_Co_Er_Vie_Alexis  0.72  4103
# see if there is a correlation
reg = lm(out[, 3] ~ out[, 2])
summary(reg)
## 
## Call:
## lm(formula = out[, 3] ~ out[, 2])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -5578  -2964  -1196   1235  14169 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5146.0      533.6   9.644  1.4e-13 ***
## out[, 2]      3322.0     1763.5   1.884   0.0647 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4082 on 57 degrees of freedom
## Multiple R-squared:  0.0586, Adjusted R-squared:  0.04209 
## F-statistic: 3.548 on 1 and 57 DF,  p-value: 0.06471
plot(out[, 2], out[, 3])
abline(reg)

# Et la distrib des VI
boxplot(out[, 2])

hist(out[, 2])

Controlling for pipeline bias

RefcahList = list(raw3grams = CAHRaw3gr, Affs = CAHAffs, FunctWords = CAHFW, FunctLemm = CAHFL, POS3gr = CAHPOS3gr, FWPOSandAffs = CAHGlob2, Forms = CAHForms,  Lemmas = CAHLemmas, WordsLemmas = CAHWordsLemmas)
##CAREFUL ###
####TEMPORARY FIX - DO REMOVE ME LATER#####
#toKeepBis = toKeep[!toKeep == "60_Ano_Leg-B_NA_NA_NA_Antechriste"]
# Redo base results without Antechrist
#RefcahListBis = replicateAnalysis(toKeepBis, "data/transkr_raw_char3grams.csv", "data/transkr_expanded_words.csv", "data/transkr_pos3-gr.csv", "data/transkr_lemmas.csv", functionWords, functionLemmas)
# 1. get Students analysis list
#StudentsResults = replicateAnalysis(toKeepBis, "data/transkr_student_raw_char3grams.csv", "data/transkr_student_expanded_words.csv", "data/transkr_student_pos3-gr.csv", "data/transkr_student_lemmas.csv", functionWords, functionLemmas)

Supplementary material

Violin plots features

gridExtra::grid.arrange(WauFeats1, WauFeats2a, WauFeats2b, WauFeats3, WauFeats4, WauFeats5, WauFeats6, ncol = 2)